Return to Homepage

Return to Data Visualisation Section


Description of the Data


The data comes from the Tidy Tuesday project launched by R for Data Science team. It contains state-level salary information on registered nurses over the period 1998 to 2020.


Data Cleaning and Exploration


As we have aggregated statistics already, there is no need for any data cleaning or aggregating. As this post is part of the data visualisation, let’s get straight to it.


Plot 1: Development Of Average Salaries Over Time


nurses %>% 
  group_by(year) %>% 
  summarise(mean_salary = mean(annual.salary.avg, na.rm = TRUE),
            higher_band = mean(annual.90th.percentile, na.rm = TRUE),
            lower_band = mean(annual.10th.percentile, na.rm = TRUE)) %>% 
  ggplot(aes(year, mean_salary)) +
  geom_line(colour = "dodgerblue") +
  geom_ribbon(aes(ymin = lower_band, ymax = higher_band),
              fill = "dodgerblue", colour = "dodgerblue", alpha = 0.4,
              lty = "dotted")  +
  labs(title = "Average Salaries Over Time",
       subtitle = "Confidence Bands showing 10th and 90th percentiles",
       x = NULL,
       y = "Salary") + 
  scale_x_continuous(labels = scales::number_format(accuracy = 1,
                                                    big.mark = "")) +
  scale_y_continuous(labels = scales::dollar_format()) +
  expand_limits(y = 0) +
  theme_bw() +
  theme(panel.grid.major.y = element_blank(),
        panel.grid.minor.x = element_blank(),
        plot.title = element_text(face = "bold", size = 12),
        plot.subtitle = element_text(face = "italic", colour = "grey50"))


Plot 2: Growth Rates In Mean Salary


nurses %>% 
  group_by(year) %>% 
  summarise(mean_salary = mean(annual.salary.avg, na.rm = T)) %>% 
  mutate(change = mean_salary/lag(mean_salary) - 1) %>%  
  filter(!is.na(change)) %>% 
  ggplot(aes(year, change)) +
  geom_col(fill = "dodgerblue") +
  labs(title = "Yearly Salary Growth For Registered Nurses",
       subtitle = "Growth calculated on average salaries across states",
       x = NULL,
       y = "YoY Change in Salaries") + 
  scale_x_continuous(labels = scales::number_format(accuracy = 1,
                                                    big.mark = "")) +
  scale_y_continuous(labels = scales::percent_format()) +
  expand_limits(y = 0) +
  theme_bw() +
  theme(panel.grid.minor.x = element_blank(),
        panel.grid.major.x = element_blank(),
        plot.title = element_text(face = "bold", size = 12),
        plot.subtitle = element_text(face = "italic", colour = "grey50"))


Plot 3: Total Employment Compared To Total Employment At Aggregate


nurses %>% 
  group_by(year) %>% 
  summarise(total_employment = mean(total.employed..national._aggregate, na.rm = T),
            total_employment_healthcare = mean(total.employed..healthcare..national._aggregate, na.rm = T)) %>% 
  mutate(total_growth = total_employment/lag(total_employment) - 1,
         healthcare_growth = total_employment_healthcare/lag(total_employment_healthcare) - 1) %>% 
  select(year, total_growth, healthcare_growth) %>% 
  filter(!is.na(total_growth)) %>% 
  rename("Healthcare" = healthcare_growth,
         "Aggregate Economy" = total_growth) %>% 
  pivot_longer(-c(year)) %>% 
  ggplot(aes(year, value, fill = name)) +
  geom_col(position = "dodge") +
  labs(title = "YoY Employment Growth: Healthcare vs. Aggregate",
       subtitle = "Growth calculated on aggregate values",
       x = NULL,
       y = "YoY Change in Employment",
       fill = NULL) + 
  scale_x_continuous(labels = scales::number_format(accuracy = 1,
                                                    big.mark = "")) +
  scale_y_continuous(labels = scales::percent_format()) +
  scale_fill_manual(values = c("midnightblue", "firebrick")) +
  expand_limits(y = 0) +
  theme_bw() +
  theme(panel.grid.major.y = element_blank(),
        panel.grid.minor.x = element_blank(),
        plot.title = element_text(face = "bold", size = 12),
        plot.subtitle = element_text(face = "italic", colour = "grey50"))


Plot 4: Development Of Total Registered Nurses


nurses %>% 
  group_by(year) %>% 
  summarise(nurses = sum(total.employed.rn, na.rm = T)) %>% 
  ggplot(aes(year, nurses)) +
  geom_line(colour = "dodgerblue") +
  geom_point(colour = "dodgerblue") +
  expand_limits(y = 0) +
  labs(title = "Total Employed Registered Nurses In The US",
       subtitle = "Data from Data.World",
       x = NULL,
       y = NULL,
       fill = NULL) + 
  scale_x_continuous(labels = scales::number_format(accuracy = 1,
                                                    big.mark = "")) +
  scale_y_continuous(labels = scales::comma_format()) +
  scale_fill_manual(values = c("midnightblue", "firebrick")) +
  expand_limits(y = 0) +
  theme_bw() +
  theme(panel.grid.major.y = element_blank(),
        panel.grid.minor.x = element_blank(),
        plot.title = element_text(face = "bold", size = 12),
        plot.subtitle = element_text(face = "italic", colour = "grey50"))


Plot 5: Latest Median Salary By State


nurses %>% 
  filter(year == 2020) %>% 
  select(state, annual.salary.median) %>% 
  mutate(state = fct_reorder(state, annual.salary.median)) %>% 
  ggplot(aes(annual.salary.median, state)) +
  geom_col(fill = "dodgerblue", colour = "white") +
  labs(title = "Nurse Salaries By US States",
       subtitle = "Data from Data.World",
       x = "Median Salary",
       y = NULL,
       fill = NULL) + 
  scale_x_continuous(labels = scales::dollar_format()) +
  theme_bw() +
  theme(panel.grid.major.y = element_blank(),
        panel.grid.minor.x = element_blank(),
        plot.title = element_text(face = "bold", size = 12),
        plot.subtitle = element_text(face = "italic", colour = "grey50"))

 

A work by Mathias Steilen